from sklearn.model_selection import StratifiedKFold
from sklearn.datasets import load_breast_cancer

# 加载威斯康辛乳腺癌数据集
data = load_breast_cancer()
breast_cancer_dataset = data.data
breast_cancer_target = data.target

# 保证数据样本分布一致,2折交叉验证划分训练集和测试集
kf = StratifiedKFold(n_splits=2, shuffle=True, random_state=1)
# # 2折交叉验证，将数据分为两份即前后对半分，每次取一份作为test集
for train_index, test_index in kf.split(breast_cancer_dataset, breast_cancer_target):
    print("train_X", breast_cancer_dataset[train_index],
          "train_Y", breast_cancer_target[train_index],
          "test_X", breast_cancer_dataset[test_index],
          "test_Y", breast_cancer_target[test_index], sep='\n')
